******************************************************************************************************************************************************
*This do file creates the final analysis sample:
*	a.	Input files: 
*		i.	PCI_coded.dta
*		ii.	GSO_IV.dta
*		iii.GSO_IVnoncollapsed.dta
*		iv.	GSO_IVagg.dta
*		v.	GSO_IVregion.dta
*		vi. crosswalk_province.csv
*		vii.region.dta
*	b.	Output files:
*		i.	finalanalysissample.dta													   																	    *
******************************************************************************************************************************************************

clear all
set more off
set mem 600m
set maxvar 3000


*SET DIRECTORY HERE*
global dir_data_original /Users/Jie/Dropbox (Personal)/CorruptionIncome/EJFinalSubmission/data&program/original raw data/
global dir_data_coded /Users/Jie/Dropbox (Personal)/CorruptionIncome/EJFinalSubmission/data&program/intermediary data/
global dir_data_analysis /Users/Jie/Dropbox (Personal)/CorruptionIncome/EJFinalSubmission/data&program/analysis data/

version 

**READ IN GSO DATA WITH IV
use "${dir_data_analysis}/GSO_IV.dta",clear
tempfile GSO
save `GSO',replace
insheet using "${dir_data_original}/Crosswalks/crosswalk_province.csv", comma clear
merge 1:m province using `GSO'	
keep if _merge==3
drop _merge
drop if pci_id==.
drop if year==.
drop if broad==""
sort pci_id broad year
save `GSO',replace

use "${dir_data_analysis}/GSO_IVagg.dta",clear
tempfile GSOagg
save `GSOagg',replace
insheet using "${dir_data_original}/Crosswalks/crosswalk_province.csv", comma clear
merge 1:m province using `GSOagg'	
keep if _merge==3
drop _merge
drop if pci_id==.
drop if year==.
sort pci_id year
save `GSOagg',replace


**READ IN PCI DATA
use "${dir_data_coded}/PCI_coded.dta", clear

order pci_id broad year
drop if pci_id==.
drop if broad==""
drop if year==.

sort pci_id broad year
merge m:1 pci_id broad year using `GSO' 
tab _merge
keep if _merge==3
drop _merge

sort pci_id year
merge m:1 pci_id year using `GSOagg' 
tab _merge
keep if _merge==3
drop _merge

drop if bribe_pctrev==.

**MERGE REGION
drop region
merge m:1 pci_id using "${dir_data_original}/Crosswalks/region"
drop if _m==2
drop _m

sort region broad year
merge m:1 region broad year using "${dir_data_analysis}/GSO_IVregion.dta"
drop if _m==2
drop _m

**GENERATE GROUP VARIABLES
drop r j t rj jt

g r=pci_id
encode broad,g(j)	
g t=year

egen rjt = group(r j t)
egen rj = group(r j)
egen rt = group(r t)
egen jt = group(j t)

bys j: egen employj = mean(employ_n)
bys rj: egen employrj = mean(employ_n)
g lnemploy=ln(employ_n)
g lnemployj= ln(employj)
g lnemployrj= ln(employrj)

bys rjt: egen bribe_pctrev_rjt = mean(bribe_pctrev)
bys rjt: gen temp_rjt=_n


**MERGE CHINESE DATA
merge m:1 broad year using "${dir_data_original}/CH_YLS_0612"
keep if _m==3
drop _m


**CREATE INTERACTION VARIABLES
tab broad,g(inddummy)

forvalues i=1(1)18{
	g lntotalemploy_r_jtXinddummy`i'=lntotalemploy_r_jt*inddummy`i'
	}
	
forvalues i=1(1)18{
	g yearXinddummy`i'=year*inddummy`i'
	}
	
forvalues i=1(1)18{
	g lnchtotalemploy_jtXinddummy`i'=lnchtotalemploy_jt*inddummy`i'
	}

foreach intervar of var employj employrj lnemployj lnemployrj lurc lurclastyr ownland ownlandnlc lnprem pieces_d yrsopen numdocs sharedocs formerhhfirm operations opdich choose_other mobile ownergov ownersoe formerSOE govholdshare plurclastyr plurc pownland poperations popdich iz year{ 
		g lntotalemploy_r_jtX`intervar' = `intervar' * lntotalemploy_r_jt
}

foreach intervar of var employj employrj lnemployj lnemployrj lurc lurclastyr ownland ownlandnlc lnprem pieces_d yrsopen numdocs sharedocs formerhhfirm operations opdich choose_other mobile ownergov ownersoe formerSOE govholdshare plurclastyr plurc pownland poperations popdich iz year{ 
		g lntotalemploy_regX`intervar' = `intervar' * lntotalemploy_reg_jt
}

foreach v of var ownland lurc lurclastyr operations opdich lnemployj ///
	formerhhfirm formerSOE ownergov ownersoe sharedocs govholdshare lnprem yrsopen ///
	pownland plurclastyr poperations popdich{
	g lnchtotalemploy_jtX`v'=lnchtotalemploy_jt*`v'
	}

foreach v of var ownland lurc lurclastyr operations opdich lnemployj ///
	formerhhfirm formerSOE ownergov ownersoe sharedocs govholdshare lnprem yrsopen ///
	pownland plurclastyr poperations popdich{
	forvalues i=1(1)18{
	g VNX`v'Xinddummy`i'=lntotalemploy_r_jt*`v'*inddummy`i'
	}
	}
	
foreach v of var ownland lurc lurclastyr operations opdich lnemployj ///
	formerhhfirm formerSOE ownergov ownersoe sharedocs govholdshare lnprem yrsopen ///
	pownland plurclastyr poperations popdich{
	forvalues i=1(1)18{
	g chX`v'Xinddummy`i'=lnchtotalemploy_jt*`v'*inddummy`i'
	}
	}

foreach v of var ownland lurc lurclastyr operations opdich lnemployj ///
	formerhhfirm formerSOE ownergov ownersoe sharedocs govholdshare lnprem yrsopen ///
	pownland plurclastyr poperations popdich{
	g lntotalemploy_rjtX`v'=lntotalemploy_rjt*`v'
	}

**LABEL VARS
label var lntotalemploy_r_jt "Log Vietnamese employment in industry-year (excluding own province)"
label var lntotalemploy_rjt "Log Vietnamese employment in industry-year (in own province)"
label var lnchtotalemploy_jt "Log Chinese employment in industry-year"
label var ownland "Firm owns its land"
label var lurc "Firm owns land and has LURC"
label var lntotalemploy_rjtXlurc "Firm owns land and has LURC X log Vietnamese employment in industry year (in own province)"
label var lntotalemploy_rjtXownland "Firm owns its land X log Vietnamese employment in industry year (in own province)"
label var lntotalemploy_rjtXlurclastyr "Firm owns land and has LURC X log Vietnamese employment in industry year (in own province)"
label var lntotalemploy_rjtXoperations "Number of other provinces in which firm operates X log Vietnamese employment in industry-year (in own province)"
label var lntotalemploy_rjtXopdich "Firm currently operates in more than one province X log Vietnamese employment in industry-year (in own province)"
label var sharedocs "Share of registration documents held"
label var formerhhfirm "Former HH firm (dummy)"
label var formerSOE "Former SOE (dummy)"
label var ownergov "Owner is government official (dummy)"
label var govholdshare "Government holds positive share (dummy)"
label var lnprem "Log of business premise size (hectare)"
label var yrsopen "Years since establishment"

**SAVING
save "${dir_data_analysis}/finalanalysissample.dta",replace


